library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggridges)

load the weather data

weather_df = 
  rnoaa::meteo_pull_monitors(
    c("USW00094728", "USC00519397", "USS0023B17S"),
    var = c("PRCP", "TMIN", "TMAX"), 
    date_min = "2017-01-01",
    date_max = "2017-12-31") %>%
  mutate(
    name = recode(
      id, 
      USW00094728 = "CentralPark_NY", 
      USC00519397 = "Waikiki_HA",
      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())

weather_df
## # A tibble: 1,095 x 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2017-01-01     0   8.9   4.4
##  2 CentralPark_NY USW00094728 2017-01-02    53   5     2.8
##  3 CentralPark_NY USW00094728 2017-01-03   147   6.1   3.9
##  4 CentralPark_NY USW00094728 2017-01-04     0  11.1   1.1
##  5 CentralPark_NY USW00094728 2017-01-05     0   1.1  -2.7
##  6 CentralPark_NY USW00094728 2017-01-06    13   0.6  -3.8
##  7 CentralPark_NY USW00094728 2017-01-07    81  -3.2  -6.6
##  8 CentralPark_NY USW00094728 2017-01-08     0  -3.8  -8.8
##  9 CentralPark_NY USW00094728 2017-01-09     0  -4.9  -9.9
## 10 CentralPark_NY USW00094728 2017-01-10     0   7.8  -6  
## # … with 1,085 more rows

scatterplot

Create my first scatterplot ever.

ggplot(weather_df,aes(x = tmin, y = tmax)) + 
  geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

New approach, same plot

weather_df %>% 
  ggplot(aes(tmin, tmax)) +
  geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

save and edit a plot object

plot_weather = 
  weather_df %>%
  ggplot(aes(x = tmin, y = tmax)) 

plot_weather + geom_point(alpha = .5)
## Warning: Removed 15 rows containing missing values (geom_point).

Advanced scatterplot

Start with the same one and made it fancy

weather_df %>% 
  ggplot(aes(tmin, tmax,color = name)) +
  geom_point(alpha = .5) +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

what about the ‘aes’ placement

weather_df %>% 
  ggplot(aes(tmin, tmax)) +
  geom_point(aes(color = name)) +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

facet same thing

weather_df %>% 
  ggplot(aes(tmin, tmax,color = name)) +
  geom_point(alpha = .5) +
  geom_smooth(se = FALSE) +
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

combine some elements and tey a new plot

ggplot(weather_df, aes(x = date, y = tmax, color = name)) + 
  geom_point(aes(size = prcp), alpha = .5) +
  geom_smooth(se = FALSE) + 
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

Write a code chain that starts with weather_df; focuses only on Central Park, converts temperatures to Fahrenheit, makes a scatterplot of min vs. max temperature, and overlays a linear regression line (using options in geom_smooth()).

weather_df %>% 
  filter(name == "CentralPark_NY") %>% 
  mutate(
    tmax_fahr = tmax * (9 / 5) + 32,
    tmin_fahr = tmin * (9 / 5) + 32) %>% 
  ggplot(aes(x = tmin_fahr, y = tmax_fahr)) +
  geom_point(alpha = .5) + 
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

Looks like there’s a pretty linear relationship between min and max temperatures in Central Park.

some small notes

how many geoms have to exist?

You can have whatever geoms you want

weather_df %>% 
  ggplot(aes(tmin,tmax,color = name)) +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

You can use a neat geom!

weather_df %>% 
  ggplot(aes(tmin,tmax)) +
  geom_hex()
## Warning: Removed 15 rows containing non-finite values (stat_binhex).

weather_df %>% 
  ggplot(aes(tmin,tmax)) +
  geom_bin2d()
## Warning: Removed 15 rows containing non-finite values (stat_bin2d).

weather_df %>% 
  ggplot(aes(tmin,tmax)) +
  geom_density_2d() + 
  geom_point(alpha = .3)
## Warning: Removed 15 rows containing non-finite values (stat_density2d).
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df) + geom_point(aes(x = tmax, y = tmin), color = "blue")
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df) + geom_point(aes(x = tmax, y = tmin, color = "blue"))
## Warning: Removed 15 rows containing missing values (geom_point).

univariate plots

histograms

weather_df %>% 
  ggplot(aes(tmin)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 15 rows containing non-finite values (stat_bin).

add color

weather_df %>% 
  ggplot(aes(tmin, fill = name)) + 
  geom_histogram(position = 'dodge')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 15 rows containing non-finite values (stat_bin).

weather_df %>% 
  ggplot(aes(tmin, fill = name)) + 
  geom_histogram(position = 'dodge') +
  facet_grid(. ~name)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 15 rows containing non-finite values (stat_bin).

new geometry

weather_df %>% 
  ggplot(aes(x = tmax, fill = name)) + 
  geom_density(alpha = .4, adjust = .5, color = "blue")
## Warning: Removed 3 rows containing non-finite values (stat_density).

box plot

weather_df %>% 
  ggplot(aes(x = name, y = tmax)) +
  geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

trendy plots

weather_df %>% 
  ggplot(aes(x = name, y = tmax)) + 
  geom_violin(aes(fill = name), alpha = .5) + 
  stat_summary(fun = "median", color = "blue")
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_summary).
## Warning: Removed 3 rows containing missing values (geom_segment).

ridgeplots

weather_df %>% 
  ggplot(aes(x = tmax, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 1.84
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).